library(tidyverse)
library(magrittr)
library(data.table)
library(lubridate)
library(hms)
library(ggthemes)
theme_set(theme_few())

### SET WORKING DIRECTORY HERE ###
path_to_archive <- "replication/"
data_dir <- paste0(path_to_archive, "data/")
setwd(data_dir)
plot_dir <- paste0(path_to_archive, "plots/")
tables_dir <- paste0(path_to_archive, "tables/")

### FUNCTION DEFINITIONS FOR WEIGHTING SCHEMES
diff2 <- function(x) x[seq(1,length(x)-1,2)] - x[seq(2,length(x),2)]

inv_var_weight <- function(i,d) {
	weighted.mean(d[i, est], w=sqrt(d[i, n_t] * d[i, n_c]), na.rm=T)
}

total_size_weight <- function(i, d) {
	weighted.mean(d[i,est], w=d[i,n_t] + d[i,n_c], na.rm=T)
}

simple_avg <- function(i,d){
	mean(d[i,est], na.rm=T)
}



dd <- readRDS(paste0(data_dir, "all_dd_data_exposure.rds"))

dd <- dd[,group:=ifelse(group=="T","T","C")] %>%
		.[,.(exp_diff = weighted.mean(exp_diff, n, na.rm=T), n=sum(n, na.rm=T)), by=.(dma_code, channel, affiliate, program, ad_time_utc,group)] %>%
		.[,.(est = exp_diff[group=="T"] - exp_diff[group=="C"], n_t = n[group=="T"], n_c = n[group=="C"]), by=.(dma_code, channel, affiliate, program, ad_time_utc)] %>%
		.[!is.na(est)]		

# full sample point estimate, bootstrap
pt_est_full <- inv_var_weight(1:nrow(dd),dd)
set.seed(5648933)
bs_full <- map_dbl(1:1000, ~ inv_var_weight(sample.int(nrow(dd), replace=T), dd))
results_full <- data.table(replicate = 0:1000, estimate = c(pt_est_full, bs_full))

pt_est_full_simple <- simple_avg(1:nrow(dd),dd)
set.seed(5648933)
bs_full_simple <- map_dbl(1:1000, ~ simple_avg(sample.int(nrow(dd), replace=T), dd))
results_full_simple <- data.table(replicate = 0:1000, estimate = c(pt_est_full_simple, bs_full_simple))

pt_est_full_size <- total_size_weight(1:nrow(dd),dd)
set.seed(5648933)
bs_full_size <- map_dbl(1:1000, ~ total_size_weight(sample.int(nrow(dd), replace=T), dd))
results_full_size <- data.table(replicate = 0:1000, estimate = c(pt_est_full_size, bs_full_size))


# by days to election

dd[,`:=` (days_to_election = as.numeric(mdy("11-06-2012") - date(ad_time_utc)))]

pt_est_daily <- dd[,.(estimate = inv_var_weight(1:.N, .SD)),by=.(days_to_election)] %>% 
	setkey(days_to_election) %>% 
	.[,replicate:=0]

pt_est_daily_simple <- dd[,.(estimate = simple_avg(1:.N, .SD)),by=.(days_to_election)] %>% 
	setkey(days_to_election) %>% 
	.[,replicate:=0]

pt_est_daily_size <- dd[,.(estimate = total_size_weight(1:.N, .SD)),by=.(days_to_election)] %>% 
	setkey(days_to_election) %>% 
	.[,replicate:=0]

set.seed(389494)
bs_daily <- dd[,.(replicate = 1:1000, 
				  estimate = map_dbl(1:1000, ~ inv_var_weight(sample.int(.N, replace=T), .SD))),
				by=.(days_to_election)] 

results_daily <- rbind(pt_est_daily, bs_daily) %>%
		setkey(days_to_election, replicate)

set.seed(389494)
bs_daily_simple <- dd[,.(replicate = 1:1000, 
				  estimate = map_dbl(1:1000, ~ simple_avg(sample.int(.N, replace=T), .SD))),
				by=.(days_to_election)] 

results_daily_simple <- rbind(pt_est_daily_simple, bs_daily_simple) %>%
		setkey(days_to_election, replicate)

set.seed(389494)
bs_daily_size <- dd[,.(replicate = 1:1000, 
				  estimate = map_dbl(1:1000, ~ total_size_weight(sample.int(.N, replace=T), .SD))),
				by=.(days_to_election)] 

results_daily_size <- rbind(pt_est_daily_size, bs_daily_size) %>%
		setkey(days_to_election, replicate)



save(results_full,results_daily,file=paste0(data_dir, "dd_bootstraps_exposure.RData"))
save(results_full_simple,results_daily_simple,file=paste0(data_dir, "dd_bootstraps_exposure_simple_wgt.RData"))
save(results_full_size,results_daily_size,file=paste0(data_dir, "dd_bootstraps_exposure_size_wgt.RData"))

# load(file=paste0(data_dir, "dd_bootstraps.RData"))
# load(file=paste0(data_dir, "dd_bootstraps_simple_wgt.RData"))
# load(file=paste0(data_dir, "dd_bootstraps_size_wgt.RData"))

### confidence intervals ###


quantiles <- c(0.01,0.025,0.975,0.99)

ci_full <- results_full[,c(point_est=estimate[replicate==0], map(quantiles, ~ quantile(estimate[replicate!=0], probs=.)) %>% set_names(paste("q",c("01","025","975","99"), sep="_")))]
ci_daily <- results_daily[,c(point_est=estimate[replicate==0], map(quantiles, ~ quantile(estimate[replicate!=0], probs=.)) %>% set_names(paste("q",c("01","025","975","99"), sep="_"))), by =.(days_to_election)]


ci_full_simple <- results_full_simple[,c(point_est=estimate[replicate==0], map(quantiles, ~ quantile(estimate[replicate!=0], probs=.)) %>% set_names(paste("q",c("01","025","975","99"), sep="_")))]
ci_daily_simple <- results_daily_simple[,c(point_est=estimate[replicate==0], map(quantiles, ~ quantile(estimate[replicate!=0], probs=.)) %>% set_names(paste("q",c("01","025","975","99"), sep="_"))), by =.(days_to_election)]

ci_full_size <- results_full_size[,c(point_est=estimate[replicate==0], map(quantiles, ~ quantile(estimate[replicate!=0], probs=.)) %>% set_names(paste("q",c("01","025","975","99"), sep="_")))]
ci_daily_size <- results_daily_size[,c(point_est=estimate[replicate==0], map(quantiles, ~ quantile(estimate[replicate!=0], probs=.)) %>% set_names(paste("q",c("01","025","975","99"), sep="_"))), by =.(days_to_election)]


### TABLE 3(B)

library(stargazer)
y <- rnorm(100)
x <- matrix(rnorm(100*nrow(ci_full)), nrow=100)

fake_model <- lm(y ~ x - 1)



dd_bootstrap_cis <- stargazer(list(fake_model, fake_model, fake_model),
		  title="Average Effects and Bootstrapped Confidence Intervals, by Subgroup.",
		  style="apsr",
		  out=paste0(tables_dir, "dd_bootstrap_exposure_cis.tex"),
		  label="tab:dd_boostrap_cis",
		  coef=list(ci_full[,point_est], ci_full_size[,point_est], ci_full_simple[,point_est]), 
		  ci=T,
		  ci.custom=list(ci_full[, .(q_025, q_975)] %>% as.matrix,
		  				 ci_full_size[, .(q_025, q_975)] %>% as.matrix,
		  				 ci_full_simple[, .(q_025, q_975)] %>% as.matrix),
		  star.cutoffs = NA,
		  omit.stat = "all",
		  column.labels = c("Inv. Variance", "Total Devices", "Equally Weighted"),
		  dep.var.labels = "Effect (Additional Ads)",
		  covariate.labels = "Treated",
		  notes="\\parbox[t]{0.7\\linewidth}{Table reports average effects, weighted by 1) the geometric mean of treatment and control group size, 2) total devices in treatment and control groups and 3) a simple equally weighted average. Confidence intervals are the central 95\\% interval of 1000 bootstrap replicates within each subgroup.}",
		  notes.append=F
		  )

